// 网络爬虫
const http = require("http");
const cheerio = require("cheerio");
const mongod = require('./mongdb');
const request = require("request");
const crawler = {};
const Iconv = require('iconv-lite');
const ObjectID = require('mongodb').ObjectID;
const fs = require('fs');
const path = require('path');

process.on('unhandledRejection', error => {
    console.error('unhandledRejection', error);
    process.exit(1) // To exit with a 'failure' code
});

//创建任务队列
crawler.queue = async (arr)=>{
    var tag = await new Promise((resolute)=>{
        mongod.mongod.add(arr, 'queue', 'book',function(err){
            if (err) throw err;
            resolute(true);
        });
    });
    return tag;
}
// 小说爬虫
crawler.pageData = async (url)=>{
    //将任务
    var msg = await new Promise((resolve) => {
        var options = {
            url: url,
            headers: {
                'User-Agent': 'request',
                'content-type': 'text/html; charset=gbk'
            },
            encoding: null
        };
        request(options, function (err, response, body) {
            // 定时器
            try {
                if(err) throw err;
            } catch (err) {
                // 重新开始任务
                crawler.pageData(url);
            }
            resolve(body);
        });
    });
    // 转码
    msg = (Iconv.decode(msg, 'gbk').toString());

    const $ = cheerio.load(msg);
    let book = {};
    let title = $('#info h1').text();//小说名
    let author = $($('#info p')[0]).text();//作者
    let digest = $('#intro p').text();//摘要
    let img = 'https://www.booktxt.net'+$('#fmimg img').attr('src');//图片
    let section = [];//章节
    $('#list dt').last().addClass('section_mark');
    $('#list .section_mark').nextAll().each(function(index,e){
        let item = {};
        item.name = $(e).text();
        item.href = url+$(e).find('a').attr('href');
        section.push(item);
    });
    book.title = title;
    book.author = author;
    book.digest = digest;
    book.img = img;
    book.section = section;

    // 判断新加还是章节部分更新
    var tag = await new Promise((resolute)=>{
        mongod.mongod.find({title:title},'book','book').then((e)=>{resolute(e);}); 
    });
    
    if(tag.count){
        let num = tag.data[0].section.length;
        let newNum = section.length;
        //  判断章节长度
        if(newNum>num){
            for(var i =(newNum-num);i>0;i--){
                tag.data[0].section.push(section[num+i-1]);
            }
            let upTag =await new Promise((resolute)=>{
                mongod.mongod.change({title:title},tag.data[0],'book','book').then((e)=>{resolute(e);});
            }); 
        }   
    }else{
        // var base64 = await new Promise((resolute)=>{
        //     let img = book.img;
        //     let src = '../static/'+img.split('/')[img.split('/').length - 1]+'.jpg';
        //     var writeStream=fs.createWriteStream(src,{autoClose:true})
        //     request(img).pipe(writeStream);
        //     writeStream.on('finish',function(){
        //         resolute();
        //     })
        // });
        console.log('new');
        let addTag =await new Promise((resolute)=>{
             mongod.mongod.add(book,'book','book').then((e)=>{resolute(e);});
        });
    }
}
// 小说章节爬取(全部章节)
crawler.sectionAll = async (_id)=>{
    let data = await new Promise((resolute)=>{
        mongod.mongod.find({'_id':new ObjectID(_id)},'book','book').then((e)=>{resolute(e);}); 
    });
    let sections = data.data[0].section;
    let length = sections.length+1;
    for(let i = 0;i<length-1;i++){
        let addTag = await new Promise((resolute)=>{
            crawler.sectionOne(sections[i].href,_id,i).then((e)=>{resolute(e);});
        })
    }
}
// 小说章节爬取(单章)
crawler.sectionOne = async (url,parentID,no)=>{
    // 判断单章是否已经爬取
    let tag = await new Promise((resolute)=>{
        mongod.mongod.find({url:url},'section','book').then((e)=>{resolute(e);});
    });
    if(!tag.data.length){
        let msg = await new Promise((resolve) => {
            var options = {
                url: url,
                headers: {
                    'User-Agent': 'request',
                    'content-type': 'text/html; charset=gbk'
                },
                encoding: null
            };
            request(options, function (err, response, body) {
                // 定时器
                try {
                    if(err) throw err;
                } catch (err) {
                    // 重新开始任务
                    crawler.sectionOne(url);
                }
            resolve(body);
            });
        });
        // 转码
        msg = (Iconv.decode(msg, 'gbk').toString());
        const $ = cheerio.load(msg);
        let title = $('.bookname h1').text();
        let text = $('#content').text();
        text.replace('    ','<br>');
        // 存入section集合中
        let section = {'url':url,title:title,text:text,parentID:parentID,no:no}
        let addTag = await new Promise((resolute)=>{
            mongod.mongod.add(section,'section','book').then((e)=>{resolute(e);});
        })
    }
}

module.exports = crawler;

